import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
import random
from scipy.stats import multivariate_normal
data = pd.read_excel("Data for risk and hedging assignment.xlsx", index_col=0, names=['Day','gas','electricity'])
# Changing units from pences to pounds
data.gas = data.gas/100
data.head()
In this part I will estimate the average daily profit over the 30-day period, based on the market prices for gas and
electricity from the given dataset for November 2019.
# Copy of the dataset
df_part1 = data.copy()
# Gas consumption with contract price
df_part1['Gas cost (contract)'] = 125000
# Gas consumption with market price
df_part1['Gas cost (market)'] = (578671.80-250000)*df_part1.gas
# Operation daily cost
df_part1['Operation cost'] = 140000
# Total Cost
df_part1['Total Cost'] = df_part1['Gas cost (contract)'] + df_part1['Gas cost (market)'] + df_part1['Operation cost']
# Sales
df_part1['Sales'] = (8399999.85/1000)*df_part1.electricity
# Profit
df_part1['Profit'] = df_part1['Sales']-df_part1['Total Cost']
df_part1.round(2).style.format("{:,}")
print('Average daily profit = ',df_part1.Profit.mean().round(2),'GBP')
print('Price volatility = ',df_part1.Profit.std().round(2),'GBP')
print('Profit for the whole period = ',df_part1.Profit.sum().round(2),'GBP')
In part 2, I will estimate the 95% expected shortfall for daily profit, based on the market prices for gas and
electricity from the given dataset for November 2019. I will use bivariate normal distribution, with means and covariances taken from the dataset.
I will check for statistical parameters from a given dataset: means and covariances, to use it in creating bivariate normal distribution.
# Means for gas and electricity prices
means = (data.gas.mean(),data.electricity.mean())
print('Gas price mean:', means[0].round(2),'GBP')
print('Electricity price mean:', means[1].round(2),'GBP')
# Covariance matrix of gas and electricity variables
z = np.array([data.gas, data.electricity])
covMatrix = np.cov(z, bias=True) # with switched bias function to normalize values
pd.DataFrame(covMatrix,columns=['Gas','Electricity'])
print('Covariance (gas,electricity) = ',covMatrix[0][1])
I will generate 10'000 observations from bivariate normal distribution, with given parameters, to create the sample.
# Creating bivariate normal distribution with parameters from the dataset
mn = multivariate_normal(mean=means, cov=covMatrix)
# N - Number of observations
N = 10000
# Creating a sample of 10'000 observations from bivariate normal distribution with seed (random_state) for repeatable results
sample = mn.rvs(size=N, random_state=888)
pd.DataFrame(sample,columns=['Gas Price /therm [GBP]','Electricity Price /MWh [GBP]']).round(2)
# Saving the sample for marker's review
pd.DataFrame(sample,columns=['Gas Price /therm [GBP]','Electricity Price /MWh [GBP]']).to_csv("sample_bivariate.csv")
# Visualization of the sample's distribution
sns.jointplot(x=sample[:,0],
y=sample[:,1],
kind="kde",
space=0);
# Creating dataframe from the sample
df = pd.DataFrame(sample,columns=['gas','electricity'])
# Gas consumption with contract price
df['Gas cost (contract)']=125000
# Gas consumption with market price
df['Gas cost (market)']= (578671.80-250000)*df.gas
# Operation daily cost
df['Operation cost']=140000
# Total Cost
df['Total Cost']=df['Gas cost (contract)']+df['Gas cost (market)']+df['Operation cost']
# Sales
df['Sales']=(8399999.85/1000)*df.electricity
# Profit
df['Profit']=df['Sales']-df['Total Cost']
# Loss - negative Profit for ES calculations
df['Loss']= -df['Profit']
df.head().round(2).style.format("{:,}")
For Value at Risk at alpha = 95% level, I will use Percent Point Function (PPF), inverse of Cumulative Distribution Function (CDF), from the sample's Loss values parameters - mean and standard deviation (std).
alpha = 0.95
VaR_2 = norm.ppf(alpha, np.mean(df.Loss), np.std(df.Loss))
print('Loss Value at Risk at 95% confidence level:', f"{round(VaR_2,2):,}",'GBP')
I will calculate 95% Expected Shortfall in two ways: discrete (empirical values from the sample Loss values) and continuous (from Probability Distribution Function, created with parameters from the sample Loss values). We will see that these two approaches give similar results, with difference due to normal distribution approximation.
# Discrete approach - average all Losses greater than 95% VaR
ES_empirical_2 = df[df.Loss>=VaR_2]['Loss'].mean()
print('Expected Shortfall at 95% confidence level from sample:', f"{round(ES_empirical_2,2):,}",'GBP')
# Continuous approach - calculating expected
alpha = 0.05
ES_cont_2 = np.mean(df.Loss) + np.std(df.Loss) * norm.pdf(norm.ppf(1-alpha))/alpha
print('Expected Shortfall at 95% confidence level from normal distribution function approximation:',f"{round(ES_cont_2,2):,}",'GBP')
# Visualization of sample Loss disitrbution
mean = np.mean(df.Loss)
std_dev = np.std(df.Loss)
df.Loss.hist(bins=1000, normed=True, histtype='stepfilled', alpha=0.5)
x = np.linspace(mean - 3*std_dev, mean + 3*std_dev, 100)
plt.plot(x,norm.pdf(x, mean, std_dev), "black" )
plt.title('Sample\'s Losses distribution')
plt.xlabel('Loss GBP')
plt.ylabel('Probaility')
plt.axvline(VaR_2,color='r')
plt.axvline(ES_empirical_2,color='g')
plt.text(VaR_2+3000,0.00001,'VaR 95 = '+str("{:,}".format(int(VaR_2))),rotation='vertical',color='r')
plt.text(ES_empirical_2+3000,0.00001,'ES 95 = '+str("{:,}".format(int(ES_empirical_2))),rotation='vertical',color='g')
plt.show()
The difference between empirical (ES_empirical) and continuous (ES_cont) Expected Shortfall figures is due to normal distribution approximations. However, we see that the Expected Shortfall Loss will be on average 71'843.97 GBP.
In part 3 of the assignment, I use simulation approach, like in Part 2, to generate observations from bivariate normal distribution, with given mean and covariances parameters. Next, I modified 4% of all observations (400 days) to include generator failure (half production capacity). I choose fixed number of the sample failures, so variant B from the assignment guidelines. I have changed following parameters for failure days:
# Generate 4% unique random numbers within a range 0 to N - each number correspond to a failure day in the sample
random.seed(888)
failure_days = random.sample(range(0, N), int(N/25))
# Copy of the sample from Part 2
df_with_failures = df.copy()
# Modifying sample to include 400 failure days (4% of total observations)
for i in failure_days:
df_with_failures.at[i,'Gas cost (market)']=((578671.80*0.5)-250000)*df_with_failures.iloc[i]['gas']
df_with_failures.at[i,'Total Cost']=df_with_failures.iloc[i]['Gas cost (contract)']+df_with_failures.iloc[i]['Gas cost (market)']+df_with_failures.iloc[i]['Operation cost']
df_with_failures.at[i,'Sales']=(8399999.85/2/1000)*df_with_failures.iloc[i]['electricity']
df_with_failures.at[i,'Profit']=df_with_failures.iloc[i]['Sales']-df_with_failures.iloc[i]['Total Cost']
df_with_failures['Loss']=-df_with_failures['Profit']
df_with_failures
alpha = 0.95
VaR_3 = norm.ppf(alpha, np.mean(df_with_failures.Loss), np.std(df_with_failures.Loss))
print('Loss Value at Risk at 95% confidence level:', f"{round(VaR_3,2):,}",'GBP')
# Discrete approach - average all Losses greater than 95% VaR
ES_empirical_3 = df_with_failures[df_with_failures.Loss>=VaR_3]['Loss'].mean()
print('Expected Shortfall at 95% confidence level from sample:', f"{round(ES_empirical_3,2):,}",'GBP')
# Continuous approach - calculating expected
alpha = 0.05
ES_cont_3 = np.mean(df_with_failures.Loss) + np.std(df_with_failures.Loss) * norm.pdf(norm.ppf(1-alpha))/alpha
print('Expected Shortfall at 95% confidence level from normal distribution function approximation:',f"{round(ES_cont_3,2):,}",'GBP')
The difference between empirical (ES_empirical) and continuous (ES_cont) Expected Shortfall figures is due to normal distribution approximations. However, we see that the Expected Shortfall Loss, for a sample with 4% of failure days, will by average 81'254.42 GBP.
mean = np.mean(df_with_failures.Profit)
std_dev = np.std(df_with_failures.Profit)
df_with_failures.Profit.hist(bins=1000, normed=True, histtype='stepfilled', alpha=0.5)
x = np.linspace(mean - 3*std_dev, mean + 3*std_dev, 100)
plt.plot(x, norm.pdf(x, mean, std_dev), "black" )
plt.title('Sample\'s Losses distribution (with failure days)')
plt.xlabel('Loss GBP')
plt.ylabel('Probaility')
plt.axvline(VaR_3,color='r')
plt.axvline(ES_empirical_3,color='g')
plt.text(VaR_3+3000,0.00001,'VaR = '+str("{:,}".format(int(VaR_3))),rotation='vertical',color='r')
plt.text(ES_empirical_3+3000,0.00001,'ES = '+str("{:,}".format(int(ES_empirical_3))),rotation='vertical',color='g')
plt.show()
We have obtained VaR 95% of 64,388.32 GBP and Expected Shortfall 95% of 81'254.42 GBP. The numbers makes sense in comparison to the sample without failure days - we expected increase in both VaR and ES in part 2, as failure days in the new sample generate more losses.
In part 4 of the assignment, I will minimize Expected Shortfall, by hedging with Contracts for Differences for gas and electricity. To calculate optimal quantities for CFDs, I will search in feasible region (i.e. all allowed CFD quantities combinations) and pick quantities, which yield minimum Expected Shortfall value. I will use the grid search algorithm, presented below as pseudocode.
QG in range (0, max energy production 8`400 MWh )
QE in range (0, max consumption of gas (578`671.80 therms/day)
for x in in QG:
for y in QE:
calculate Expected Shortfall(x,y)
Find min(Expected Shortfall) and its corresponding (x,y) = (CFD_GAS_QTY,CFD_ELEC_QTY)
df_part4 = df_with_failures.copy()
df_part4['Gas Qty'] = 578671.80
df_part4['Elec Qty'] = 8400
df_part4.head().round(2).style.format("{:,}")
for i in failure_days:
df_part4.at[i,'Elec Qty'] = 8400/2
df_part4.at[i,'Gas Qty'] = 578671.80/2
df_part4['Total_Cost/MWh'] = df_part4['Total Cost']/df_part4['Elec Qty']
df_part4.head(5).round(2).style.format("{:,}")
I used grid search approach, so looking for all possible combinations of quantities for Gas and Electricity contracts to find minimum Expected Shortage figure.
# Strike prices for CFDs
Q_strike_Gas = 0.43 # 0.43 GBP per gas therm
Q_strike_Elec = 49 # 49 GBP per MWh
# Feasible region in minimization problem
# Quantity range of CFD for gas - <0,587'671>
Q_G = range(0,587671,500)
# Quantity range of CFD for electricity - <0,8'400>
Q_E = range(0,8400,50)
# Confidence level for 95% Expected Shortfall
alpha = 0.05
# List for Expected Shortfall values for every CFDs combination
es = []
for qg in Q_G:
for qe in Q_E:
# Profit from CFD for Electricity with given quantity qe
df_part4['Profit CFD Elec'] = (Q_strike_Elec - df_part4['electricity']) * qe
# Profit from CFD for Gas with given quantity qg
df_part4['Profit CFD Gas'] = (df_part4.gas - Q_strike_Gas) * qg
df_part4['Total_Profit'] = df_part4.Profit + df_part4['Profit CFD Elec'] + df_part4['Profit CFD Gas']
df_part4['Total_Loss'] = -df_part4['Total_Profit']
es.append((norm.pdf(norm.ppf(alpha))* 1/alpha * np.std(df_part4.Total_Loss) - np.mean(df_part4.Total_Loss), qg, qe))
# Saving Expected Shortfall results to csv file
pd.DataFrame(es,columns=['ES','Qty_CFD_Gas','Qty_CFD_Electricity']).to_csv('min_es.csv')
df_es = pd.DataFrame(es,columns=['ES','Q_G','Q_E'])
min_ES = df_es[df_es.ES==df_es['ES'].min()].iloc[0][0].round(2)
CFD_Gas_Qty = df_es[df_es.ES==df_es['ES'].min()].iloc[0][1].round(2)
CFD_Electricity_Qty = df_es[df_es.ES==df_es['ES'].min()].iloc[0][2]
print('Minimum Expected Shortfall = ', f"{min_ES:,}",'GBP')
print('CFD Gas Qty for min Expected Shortfall = ', f"{CFD_Gas_Qty:,}",'gas therms')
print('CFD Electricity Qty for min Expected Shortfall = ', f"{CFD_Electricity_Qty:,}",'MWh')
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.tri as mtri
triang = mtri.Triangulation(df_es['Q_G'], df_es['Q_E'])
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_trisurf(triang, df_es['ES'], cmap='RdYlBu', linewidth=1)
ax.view_init(elev=20, azim=50)
ax.dist = 10
ax.set_xlabel('\n\n\nGas [therms]')
ax.set_ylabel('\n\n\nElectricity [MWh]')
ax.set_zlabel('\n\nExcpeted Shortfall [GBP]')
fig.colorbar(surf, shrink=0.5, aspect=10)
fig.suptitle('Optimisation for Expected Shortfall')
plt.show()
The below 3D graph generated in plotly, allows to rotate the figure and check where exactly the global minimum is. However, it can be only generated dynamically in Python or jupyter notebook, therefore I include a photo.
import plotly.graph_objects as go
min_ES_index = df_es[df_es.ES==df_es['ES'].min()].index[0]
fig = go.Figure(data=[go.Scatter3d(z=df_es['ES'],
x=df_es['Q_G'],
y=df_es['Q_E'],
mode='markers',
marker=dict(size=1, color=df_es["ES"])),
# Min point
go.Scatter3d(z=[df_es['ES'][min_ES_index]],
x=[df_es['Q_G'][min_ES_index]],
y=[df_es['Q_E'][min_ES_index]],
mode='markers',
text='Minimum Expected Shortfall',
marker=dict(size=10))])
fig.update_layout(title='Optimization for Expected Shortfall',
autosize=True,
width=800, height=800,
xaxis=dict(title_text="x"),
yaxis=dict(title_text="Electricity CFD Qty"))
fig.show()